/* Table 1 */


#delimit ;
clear all;
local outfile "GWgapr10_indivIDIb";
set more off;


di _n "$S_DATE $S_TIME";






********************************************************************************;
* using HLFS data to get a list of industries that mostly employ full time;
********************************************************************************;

use GWgap_pr_HLFS_v4, clear;

drop if female==. | pf_ind=="";

collapse (mean) hrs_main_HLFS, by(female year pf_ind);

reshape wide hrs, i(year pf_ind) j(female);

rename hrs_main_HLFS0 av_hrs_m;
rename hrs_main_HLFS1 av_hrs_f;

foreach g in m f {;
	gen o30_`g' = av_hrs_`g' >=30 if av_hrs_`g'<.;
	gen o35_`g' = av_hrs_`g' >=35 if av_hrs_`g'<.;
};

gen o30_b = o30_m==1 & o30_f==1;
gen o35_b = o35_m==1 & o35_f==1;

collapse (sum) o30_b o35_b, by(pf_ind);

keep if o30_b==10; * pf_inds in which both men and women work a mean of 30 or more hours
* each week if that is their main hlfs job;

keep pf_ind;
drop if pf_ind=="";

save temp_ft_indys, replace;






********************************************************************************;
* preparing data;
********************************************************************************;

use snz_uid age max_gross_earn_yr female year max_fte_employee_av 
	max_mon_wkd ind4 ffe_m L_hc Q_rest hp_pent lnIDI_earnhp using GWgap_pr_IDI_v4, clear;


	
* merging on pf_ind;

rename hp_pent pent;
merge m:1 pent year using GWgap_pr_firm_v4, keep(master match) keepus(pf_ind) nogen;
rename pent hp_pent;
/*
* merging on full time industry list;

merge m:1 pf_ind using temp_ft_indys, keep(master match) gen(ft_i);
gen ft_indy = ft_i==3;
drop ft_i;
label var ft_indy "Full-time industry";
*/
* merging on Q_rest_HLFS;

merge 1:1 snz_uid hp_pent year using GWgap_pr_HLFS_v4, keep(master match) keepus(Q_rest_HLFS) nogen;



gen age2 = age^2/100;
label var age2 "Age squared (/100)";

gen lnL_hc = ln(L_hc);


* creating variables that mark the combinations of year with various other categorical variables;

egen ind4_yr = group(ind4 year);
label var ind4_yr "Groups defined by 4-digit ANZSIC industry and financial year";

egen firm_yr = group(hp_pent year);
label var firm_yr "Groups defined by firm and year";

forvalues year = 2002/2016 {;
	gen ffe_m_y`year' = ffe_m*(year==`year');
};

rename ind4 ind4u;


egen person_id = group(snz_uid);

egen firm_id = group(hp_pent);

gen lnmax_fte_employee_av = ln(max_fte_employee_av);
label var lnmax_fte_employee_av "FTEs at highest paying pent (ln)";





********************************************************************************;
* creating macros;
********************************************************************************;

local dvar lnIDI_earnhp; 
local dvarl: var label `dvar';

local indicators `"
	"Year FE = *year*"
	"Industry FE (4-digit ANZSIC) = *ind4u*"
	
	"'; * "CCK firm FE for males * Year FE = *ffe_m_y*";



*** regression controls;

local c1 "female lnmax_fte_employee_av age age2 i.year"; * for full IDI sample;
local c2 "`c1'"; * for restricted IDI sample;
local c3 "`c1'"; * for restricted HLFS sample;
local c4 "`c1' i.ind4u"; * restricted IDI;
local c5 "`c1' i.year "; * absorbing ind4_yr, restricted IDI sample;
local c6 "`c1' i.year"; * absorbing firm FE, restricted IDI sample;
local c7 "`c1' i.year "; * absorbing firm_yr, restricted IDI sample;
local c8 "`c1' i.year ffe_m"; * restricted IDI;

local absvar5 ind4_yr;
local absvar6 firm_id;
local absvar7 firm_yr;

local samplec1 "female!=.";
local samplec2 "Q_rest==1";
local samplec3 "Q_rest_HLFS==1";
local samplec4 "Q_rest==1";
local samplec5 "Q_rest==1";
local samplec6 "Q_rest==1";
local samplec7 "Q_rest==1";
local samplec8 "Q_rest==1";





drop max_gross_earn_yr max_mon_wkd lnL_hc;

compress;

drop snz_uid;




********************************************************************************;
* running regs;
********************************************************************************;

capture erase "`outfile'.txt";

	foreach years in 2002_16 {; * looping over sample years to include;
	
		if "`years'"=="2002_16" {;
			local yearsc "year>=2002 & year<=2016";
			local yearsl "Financial years 2002-2016 (except HLFS specification, which is 2007-2016)";
		};
	
		eststo clear;
		local headers "";

		forvalues i = 1/8 {;
			if `i'<5 | `i'==8 {;
				reg `dvar' `c`i'' if `samplec`i'' & `yearsc', vce(cluster person_id);
				local headers `" `headers' "(`i')" "';
			};
			if `i'>=5 & `i'<=7 {;
				areg `dvar' `c`i'' if `samplec`i'' & `yearsc', vce(cluster person_id) absorb(`absvar`i'');
				local headers `" `headers' "`absvar`i'' FE" "';
			};
			* next line, if uncommented, confidentialises observation count in output;
			*rndobs N, seed(0871525);
			eststo;
		};


		estout * using "`outfile'.txt", append
			style(tab) 
			c(b(star fmt(%9.3f)) se(par))
			legend label collabels(, none)
			dropped ("dropped")
			drop(_cons )
			mlabels(`headers')
			varl(`labels')
			indicate(`indicators')
			order(`')
			starlevels(* 0.05 ** 0.01)
			stats(r2 N, 
				fmt(%9.3f %9.0fc ) 
			labels("R-Squared" Observations ))
			prehead("`=char(13)'`=char(13)'
			`=char(13)'*`sample'*`years'****************************************************")
			varwidth(50) modelwidth(11) delimiter("");
			
	};

